******************************************************************
******************************************************************
*****                                                        *****
*****       Mona Morgan-Collins (King's College London)      *****
*****        Contact: mona.morgan-collins@dkcl.ac.uk         *****
*****                                                        *****
*****     (Letter) How Gap Measures Determine Results:       *****       
*****    The Case of Proportional Systems and the Gender     ***** 
*****                 Mobilization Gap                       *****
*****                                                        *****
*****        British Journal of Political Science            *****
*****                                                        *****
*****               Building all data set                    *****
*****                                                        *****
******************************************************************
****************************************************************** 


*Stata version 17.0
cd "./vt_repl" /*set your working directory*/ 

*install the following package
ssc install boottest, replace

*This file builds three final data sets
	*parl.dta replicates all results in parliamentary elections
	*munic1619bal.dta replicates all key results in local elections
	*munic1319bal.dta replicates some robustness analyses in local elections
	
*Note: the commands need to be run consecutively to ensure that data sets generated in earlier commands can be used for later commands


*****************************************************
*****************************************************
**          CREATING PARL.DTA                      **
*****************************************************
*****************************************************
import excel "./measure_raw.xlsx", sheet("coxfivasmith_smd") firstrow case(lower) clear

****************************************************
** Replication code Cox, Smith and Fiva (2016)
gen turnout_mun_final=turnout_mun
replace turnout_mun_final=fut_turnout if fut_turnout!=.   /* replacing with second round turnout, if that exists */
gen totalvotes_final=totalvotes_mun
replace totalvotes_final=fut_totalvotes_mun if fut_totalvotes_mun!=.   /* replacing with second round turnout, if that exists */
gen marginf = margin
replace marginf=fut_margin if fut_margin!=.   /* replacing with second round margin, if that exists */
keep if round==1
save "./coxfivasmith_smd.dta", replace
****************************************************

** preparing sex-separated data 1918 to merge with nor_coxsmithfiva (sex-separated election data are not available in Cox, Fiva, Smith 2016)
import excel "./measure_raw.xlsx", sheet("yr18") firstrow case(lower) clear
gen eligible1men = eligible1overall-eligible1women                                                   
gen castedvotes1men = castedvotes1overall-castedvotes1women
gen castedvotes2men = castedvotes2overall-castedvotes2women
keep knr year  eligible1women eligible1men castedvotes1women castedvotes1men castedvotes2women castedvotes2men 
foreach v of varlist eligible1women eligible1men castedvotes1women castedvotes1men castedvotes2women castedvotes2men {
replace `v' = . if `v'==9999999
}
save "./yr18_mcox.dta", replace

** preparing sex-separated data 1915 to merge with nor_coxsmithfiva (sex-separated election data are not available in Cox, Fiva, Smith 2016)
import excel "./measure_raw.xlsx", sheet("yr15") firstrow case(lower) clear
gen eligible1men = eligible1overall-eligible1women                                                   
gen castedvotes1men = castedvotes1overall-castedvotes1women
gen castedvotes2men = castedvotes2overall-castedvotes2women
keep knr year  eligible1women eligible1men castedvotes1women castedvotes1men castedvotes2women castedvotes2men 
foreach v of varlist eligible1women eligible1men castedvotes1women castedvotes1men castedvotes2women castedvotes2men {
replace `v' = . if `v'==9999999
}
save "./yr15_mcox.dta", replace

** merging sex-separated data 1915 and 1918 with nor_coxfivasmith 
use "./yr18_mcox.dta", replace
append using "./yr15_mcox.dta"
save "./yr1518_mcox.dta", replace

use "./coxfivasmith_smd.dta", replace
merge 1:1 knr year using "./yr1518_mcox.dta"
	
 Result                           # of obs.
    -----------------------------------------
    not matched                         1,387
        from master                     1,373  (_merge==1) 
        from using                         14  (_merge==2)   //7 split municipalities excluded in 1915 and 1918 by Cox et al 2016 

    matched                             1,363  (_merge==3)
    -----------------------------------------

drop if _merge==2
drop _merge

save "./coxfivasmith_smd.dta", replace

** adding 1921 from Cox, Smith and Fiva 2016 replication filed
import excel "./measure_raw.xlsx", sheet("coxfivasmith_21") firstrow case(lower) clear
save "./coxfivasmith_21.dta", replace

use "./coxfivasmith_smd.dta", replace
append using "./coxfivasmith_21.dta"
save "./parl.dta", replace

****************************************************
** Replication code Cox, Fiva and Smith 2016: dropping redistricted municipalities and multiple district municipalities
egen SMD_district=max(valgkrets), by(knr)  /* valgkrets is between 101 and 2002 pre-reform and between 1 and 31 post-reform */
egen PR_district=min(valgkrets), by(knr)   /* valgkrets is between 101 and 2002 pre-reform and between 1 and 31 post-reform */
** oslo (5), drammen (2), kristiansand (2) stavanger (2), bergen (4), trondheim (4)
** ... consist of multiple districts within a municipality. We do not have the possibility to get post-reform data on that level
** some municipalities are split after reform , need to manually find their SMD_district 
sort knr year
li knr SMD_district if SMD_district<100
replace SMD_district=101 if knr==126 /* 126 mysen was part of 125 eidsberg up to 1920 */
replace SMD_district=203 if knr==213 /* 213 ski was part of 212 krÂkstad up to 1931 */
replace SMD_district=203 if knr==222 /* 222 nordre h¯land was part of 221 h¯land */
replace SMD_district=203 if knr==225
replace SMD_district=203 if knr==228
*replace SMD_district=XXX if knr==301  /* oslo */
replace SMD_district=406 if knr==435
*replace SMD_district=XXX if knr==602 /* drammen */
replace SMD_district=902 if knr==917
*replace SMD_district=XXX if knr==1001
*replace SMD_district=XXX if knr==1103
replace SMD_district=1103 if knr==1125
replace SMD_district=1103 if knr==1127
replace SMD_district=1103 if knr==1144
replace SMD_district=1104 if knr==1151
replace SMD_district=1105 if knr==1156
replace SMD_district=1105 if knr==1158
replace SMD_district=1204 if knr==1256
replace SMD_district=1204 if knr==1260
replace SMD_district=1204 if knr==1262
*replace SMD_district=XXX if knr==1301
replace SMD_district=1404 if knr==1435
replace SMD_district=1404 if knr==1436
replace SMD_district=1502 if knr==1518
replace SMD_district=1504 if knr==1547
*replace SMD_district= if knr==1601
replace SMD_district=1602 if knr==1611
replace SMD_district=1602 if knr==1613
replace SMD_district=1603 if knr==1637
replace SMD_district=1603 if knr==1639
replace SMD_district=1604 if knr==1641
replace SMD_district=1604 if knr==1642
replace SMD_district=1604 if knr==1643
*replace SMD_district= if knr==1701
replace SMD_district=1703 if knr==1739
replace SMD_district=1703 if knr==1740
replace SMD_district=1703 if knr==1741
replace SMD_district=1801 if knr==1801
replace SMD_district=1802 if knr==1803
replace SMD_district=1802 if knr==1823
replace SMD_district=1802 if knr==1825
replace SMD_district=1802 if knr==1829
replace SMD_district=1802 if knr==1831
replace SMD_district=1804 if knr==1854
replace SMD_district=1805 if knr==1856
replace SMD_district=1805 if knr==1861
replace SMD_district=1805 if knr==1863
replace SMD_district=1806 if knr==1869
replace SMD_district=1806 if knr==1871
replace SMD_district=1806 if knr==1873
replace SMD_district=1901 if knr==1912
replace SMD_district=1901 if knr==1913
replace SMD_district=1902 if knr==1916
replace SMD_district=1902 if knr==1918
replace SMD_district=1902 if knr==1919
replace SMD_district=1903 if knr==1923
replace SMD_district=1904 if knr==1939
replace SMD_district=1904 if knr==1940
drop if SMD_district<100   /* these are  multiple-districts municipalities post-reform */
drop if SMD_district>3000  /* these are  multiple-districts municipalities pre-reform */
drop if SMD_district==690
drop if SMD_district==892
drop if SMD_district==1001
/* Lillehammer, Hamar, Gj¯vik og Kongsvinger delt i to */
drop if valgkrets==400 
drop if valgkrets==490
drop if valgkrets==500
/* extra district was added in Romsdal, and then municipalities appear to be reshuffled from 1915 to 1918 */
drop if valgkrets==1501
drop if valgkrets==1502
drop if valgkrets==1503
drop if valgkrets==1504
drop if valgkrets==1505
drop if valgkrets==1506
/* extra district was added in Troms, and then municipalities appear to be slightly reshuffled from 1915 to 1918 */
drop if valgkrets==1901
drop if valgkrets==1902
drop if valgkrets==1903
drop if valgkrets==1904
drop if knr==1280 /* aarstad part of Bergen from 1.7.1915 */
****************************************************

** generating vote totals
replace eligibleoverall=eligible1overall if year<1920   /* eligibleoverall is a variable existing only for PR, eligible1overall is the first round elig in SMD */
replace approvedvotesoverall=totalvotes_final if year<1920 /* approvedvotesoverall is a variable existing only for PR, totalvotes_final is the final round no. votes in SMD */
replace eligiblemen=eligible1men if year<1920   
replace eligiblewomen=eligible1women if year<1920  
gen castedvotesmen = castedvotes1men                                                         
replace castedvotesmen = castedvotes2men if castedvotes2men!=0
gen castedvoteswomen = castedvotes1women                                                         
replace castedvoteswomen = castedvotes2women if castedvotes2women!=0
replace castedvotesmen = personalvotesmen + mailmen if year==1921
replace castedvoteswomen = personalvotewomen + mailwomen if year==1921

*dropping 1909/1912 (does not have merged sex-separated data)
drop if year==1909
drop if year==1912

save "./parl.dta", replace

** merging 1920 census 
import excel "./measure_raw.xlsx", sheet("cens20") firstrow case(lower) clear
keep knr adultwom15 adultman15 marwom15 intelwom intelmen factwom factmen
save "./cens20.dta", replace

use "./parl.dta", replace
merge m:1 knr using "./cens20.dta" 	

  Result                           # of obs.
    -----------------------------------------
    not matched                            19
        from master                         0  (_merge==1)   
        from using                         19  (_merge==2)   //dropped 19 multi-district municipalities above

    matched                             1,840  (_merge==3)
    -----------------------------------------

** collapsing at pre-reform district with census controls 
collapse (mean) PR_district  margin marginf (sum) eligibleoverall eligiblemen eligiblewomen  approvedvotesoverall castedvoteswomen castedvotesmen totalvotes_mun_v_a totalvotes_mun_h_fv totalvotes_mun_s totalvotes_mun_oth   city adultwom15 adultman15 marwom15 intelwom intelmen factwom factmen      , by(SMD_district year) 

***********************************
** replication code Cox, Fiva and Smith 2016
xtset SMD_district year
gen xx=1
bysort SMD_district: egen zz=total(xx)
keep if zz==3 //MMC changed to 3 (years 1915, 1918, 1921)
drop xx zz
sort SMD_district year
**********************************

** generating sex-separated turnout and census variables
gen turnoutm=(castedvotesmen/eligiblemen)*100
gen turnoutw=(castedvoteswomen/eligiblewomen)*100
gen ch_turnoutm=turnoutm-l3.turnoutm
gen ch_turnoutw=turnoutw-l3.turnoutw
gen turng = turnoutw-turnoutm
gen ch_turnoutg = (turng-l3.turng)
gen turnoutwsh = ((castedvoteswomen/(castedvotesmen + castedvoteswomen)))*100                                      
gen ch_turnoutwsh = turnoutwsh-l3.turnoutwsh
gen Lturnoutm = (l3.turnoutm)

gen votrat = castedvoteswomen/castedvotesmen
gen ch_votrat = (votrat-l3.votrat)
gen turnrat = turnoutw/turnoutm
gen ch_turnrat = (turnrat-l3.turnrat)

gen fact20 = ((factmen + factwom) / (adultman15+adultwom15))*100    //generating percentage industrial workers
gen intel20 = ((intelmen + intelwom) / (adultman15+adultwom15))*100 //generating percentage intellectual workers
gen marw20 = (marwom15 / adultwom15)*100   //generating percentage married women over 15 years
gen urban = L3.city if year==1921
replace urban = 1 if urban==2
replace urban = 1 if urban==3
replace urban=f3.urban if year==1918

save "./parl.dta", replace



*****************************************************
*****************************************************
**             CREATING MUNIC1916BAL.DTA           **
*****************************************************
*****************************************************

** 1913-1919 municipal election
import excel "./measure_raw.xlsx", sheet("munic13") firstrow case(lower) clear
gen year=1913
save "./munic13.dta", replace
import excel "./measure_raw.xlsx", sheet("munic16") firstrow case(lower) clear
gen year=1916
save "./munic16.dta", replace
import excel "./measure_raw.xlsx", sheet("munic19") firstrow case(lower) clear
gen year=1919
save "./munic19.dta", replace

** appending the three election years 
use "./munic19.dta", replace
preserve
append using "./munic16.dta"
append using "./munic13.dta"
xtset knr year
save "./munic1319.dta", replace
restore

** generating sex-separated turnout and gap variables
use "./munic1319.dta", replace
xtset knr year

** generating sex-separated turnout and census variables
gen turnoutm=(votesm/electm)*100
gen turnoutw=(votesf/electf)*100
gen ch_turnoutm=turnoutm-l3.turnoutm
gen ch_turnoutw=turnoutw-l3.turnoutw

gen turng = turnoutw-turnoutm
gen ch_turnoutg = (turng-l3.turng)

gen turnoutwsh = ((votesf/(votesm + votesf)))*100                           
gen ch_turnoutwsh = turnoutwsh-l3.turnoutwsh

*gen Lturnoutm = (l3.turnoutm)
gen turnoutm16 = l3.turnoutm if year==1919
replace turnoutm16 = f3.turnoutm16 if year==1916

gen votrat = votesf/votesm
gen ch_votrat = (votrat-l3.votrat)
gen turnrat = turnoutw/turnoutm
gen ch_turnrat = (turnrat-l3.turnrat)

gen maj16 = l3.maj if year==1919

** dropping inconsistent and incomplete data entries
drop if elect<votes        //dropping N=0 where electorate size is smaller than valid votes 
drop if electm<votesm      //dropping N=1 where electorate size is smaller than valid votes 
drop if electf<votesf      //dropping N=0 where electorate size is smaller than valid votes 

drop if knr==805           //dropping N=1 where no election data in 1919

save "./munic1319.dta", replace
 
** merging with census20
use "./munic1319.dta", replace
merge m:1 knr using "./cens20.dta" 	
	
  Result                           # of obs.
    -----------------------------------------
    not matched                            5
        from master                        1  (_merge==1)   //knr 1280 
        from using                         4  (_merge==2)   //knr 126, 1637, 1639, 805 (knr 805 excluded above) 

    matched                             2,065  (_merge==3)
    -----------------------------------------


gen fact20 = ((factmen + factwom) / (adultman15+adultwom15))*100    //generating percentage industrial workers
gen intel20 = ((intelmen + intelwom) / (adultman15+adultwom15))*100 //generating percentage intellectual workers
gen marw20 = (marwom15 / adultwom15)*100      //generating percentage married women over 15 years
gen ur = real(substr(string(knr, "%9.0f"), -2, 1))
gen urban = 0
replace urban=1 if ur==0	
drop ur

*dropping municipalities with changed boundaries between election years and 1920 census
drop if knr==1280 // merged into 1301 in 1915
drop if knr==1301 // acquired 1280 in 1915
drop if knr==126 //created in 1920 as a split from 125
drop if knr==125 //lost 126 in 1920
drop if knr==1637 //created in 1920 as a split from 1638
drop if knr==1639 //created in 1920 as a split from 1638
drop if knr==1638 //lost 1637 and 1639 in 1920
drop _merge 
save "./munic1319.dta", replace
 
*dif-in-diff data prep
gen post = 0
replace post =1 if year==1919
gen treat = 0
replace treat =1 if maj16==1
xtset knr year 
replace treat=F3.treat if year==1916
replace treat=F3.treat if year==1913
gen did = post*treat
gen yr1619 = 0
replace yr1619 = 1 if year==1919
replace yr1619 = 1 if year==1916

*placebo data prep
gen post_pl = 0
replace post_pl =1 if year==1916
gen treat_pl = 0
replace treat_pl =1 if maj16==1
xtset knr year 
replace treat_pl=F3.treat_pl if year==1916
replace treat_pl=F3.treat_pl if year==1913
gen did_pl = post_pl*treat_pl
gen yr1316 = 0
replace yr1316 = 1 if year==1913
replace yr1316 = 1 if year==1916 
 
save "./munic1319.dta", replace
  
** preparing balanced sample for 1916-1919 (excluding munic that ceased to exist from one year to the other)
use "./munic1319.dta", replace
preserve
drop if year==1913
bys knr: gen npanel=_N
tab npanel
keep if npanel==2
save "./munic1619bal.dta", replace
restore   
  
** preparing balanced sample for 1916-1919 (dropping municipalities that changed boundaries anytime between 1916-1919)
import excel "./measure_raw.xlsx", sheet("redist1619") firstrow case(lower) clear
save "./redist1619.dta", replace

use "./munic1619bal.dta", replace
merge m:1 knr using "./redist1619.dta" 	
  Result                      Number of obs
    -----------------------------------------
    Not matched                         1,322
        from master                     1,314  (_merge==1)
        from using                          8  (_merge==2) 

    Matched                                58  (_merge==3)
    -----------------------------------------

bys year: tab _merge //29 matched in 1919; 29 in 1916	
drop if _merge==2  //dropping those excluded above 
drop if _merge==3  //dropping redistricted municipalities 1916-1919	
drop _merge
save "./munic1619bal.dta", replace
		
** preparing balanced sample 1913-1919 (e.g. excluding unic that ceased to exist from one year to the other)
use "./munic1319.dta", replace
preserve
bys knr: gen npanel=_N
tab npanel
keep if npanel==3
save "./munic1319bal.dta", replace
restore  
 
** preparing balanced sample for 1913-1919 (dropping municipalities that changed boundaries anytime between 1913-1916 and 1916-1919)
import excel "./measure_raw.xlsx", sheet("redist1316") firstrow case(lower) clear
drop if year==1916
append using "./redist1619.dta" 	
save "./redist1319.dta", replace

use "./munic1319bal.dta", replace
merge m:1 knr using "./redist1319.dta" 	
  Result                      Number of obs
    -----------------------------------------
    Not matched                         1,880
        from master                     1,854  (_merge==1)
        from using                         26  (_merge==2)

    Matched                               156  (_merge==3)
    -----------------------------------------

bys year: tab _merge //52 matched in each year	
drop if _merge==2  //dropping those excluded above 
drop if _merge==3  //dropping redistricted municipalities 1913-1919	
drop _merge
save "./munic1319bal.dta", replace
 
** preparing balanced sample for 1913-1919 (dropping municipalities that switched to PR in 1916) 
save "./munic1319bal.dta", replace
xtset knr year
gen pr1316 = 0                    
replace pr1316 = 1 if maj==0 & l3.maj==1 & year==1916 //identifying 42 municipalities
replace pr1316 = f3.pr1316 if year==1913
replace  pr1316 = l3.pr1316 if year==1919
save "./munic1319bal.dta", replace
 
 
